#include "cuda_runtime.h"
#include <cuda_fp16.h>

int BuildCostVolumeKernelLaunch(const float* left,
                                 const float* right,
                                 const float* shift,
                                 float * output,
                                 const int batch_size,
                                 const int channels,
                                 const int height,
                                 const int width,
                                 const int max_disp,
                                 const int downsample,
                                 cudaStream_t stream);

// int BuildCostVolumeHalfKernelLaunch(const half* left,
//                                  const half* right,
//                                  const half* shift,
//                                  half * output,
//                                  const int batch_size,
//                                  const int channels,
//                                  const int height,
//                                  const int width,
//                                  const int max_disp,
//                                  const int downsample,
//                                  cudaStream_t stream);